################################################################################
##
## Purpose: This script creates Figure 10, along with SI figure 19.
##
## Author: James Bisbee (james.h.bisbee@vanderbilt.edu)
##
## Input Files:
##  - ./data/prepped/finalData.RData: Prepped data from 9_DATA_final_build.R
##  - ./data/prepped/hearings/topic_models_100.RData: Prepped data from 7_DATA_topic_model_prep.R
##
## Output Files:
##  - ./output/figures/MS_figure_10.pdf
##  - ./output/figures/SI_figure_19.pdf
##
##
## See associated log file for compute environment, package versions, 
##  and date of most recent run.
##
################################################################################
rm(list = ls())
gc()
require(tidyverse)
require(ggridges)
require(fixest)

set.seed(123)

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

sessionInfo()



# Loading Data
load('./data/prepped/finalData.RData')
load('./data/prepped/hearings/topic_models_100.RData')
topWord <- lda_model$get_top_words(n = 50) %>%
  data.frame() %>%
  rename_all(function(x) gsub('X','topic_',x)) %>%
  mutate(top_word = row_number()) %>%
  as_tibble()

lda_model$get_top_words(n = 10) %>%
  data.frame() %>%
  rename_all(function(x) gsub('X','topic_',x)) %>%
  mutate(top_word = row_number()) %>%
  as_tibble() %>%
  t()

insubs <- c(1, 
            3,
            5, 
            9,
            10,
            11, 
            12,
            19,
            20,
            22,
            24,
            27,
            29,
            31,
            37,
            39,
            41,
            42,
            46,
            47,
            49,
            51,
            54,
            55,
            57,
            69,
            71,
            76,
            83,
            85,
            88,
            90,
            91,
            92,
            93,
            96,
            97)

substantive <- paste0('topic_',setdiff(1:100,insubs))


toplot <- utterance_level %>% 
  filter(ind > mind) %>%
  mutate(tmpSpk = ifelse(grepl('FED',opensecretsID),as.character(opensecretsID),'Others')) %>%
  mutate(tmpSpk = ifelse(grepl('YELLEN',tmpSpk),'Yellen',
                         ifelse(grepl('FED',tmpSpk),'Male Fed Chairs','Others'))) %>%
  arrange(fullInd) %>%
  select(fullInd,interrupted,nchars,tmpSpk,matches('topic_\\d+$')) %>%
  gather(topic,theta,-fullInd,-interrupted,-nchars,-tmpSpk) %>%
  group_by(fullInd,interrupted) %>%
  filter(theta == max(theta)) %>%
  slice(1) %>%
  ungroup() %>%
  arrange(fullInd) %>% distinct() %>%
  group_by(topic,tmpSpk) %>%
  summarise(int = mean(interrupted),
            meanTheta = mean(theta,na.rm=T),
            meanChars = mean(nchars,na.rm=T), 
            n=n()) %>%
  group_by(tmpSpk) %>%
  mutate(tot = sum(n),
         avgInt = mean(int)) %>%
  ungroup() %>%
  mutate(share = n/tot) %>%
  left_join(topWord %>%
              gather(topic,term) %>%
              group_by(topic) %>%
              slice(1:5) %>%
              group_by(topic) %>%
              summarise(terms = paste(term,collapse = ', '))) 

toplot <- toplot %>%
  mutate(terms = factor(terms,levels = toplot %>% filter(tmpSpk == 'Yellen') %>% arrange(int) %>% .$terms),
         topic = factor(topic,levels = toplot %>% filter(tmpSpk == 'Yellen') %>% arrange(int) %>% .$topic)) %>%
  filter(terms != 'NA',
         tmpSpk != 'Others') 

pdf('./output/figures/MS_figure_10.pdf',width = 7,height = 8)
toplot %>%
  ggplot(aes(x = int,y = topic,size = share,fill = tmpSpk,shape = tmpSpk)) + 
  geom_point(alpha = 1,size = 1) + 
  geom_vline(data = toplot %>% select(tmpSpk,avgInt) %>% distinct(),aes(xintercept = avgInt,color = tmpSpk)) +
  scale_size_continuous(name = '% of Utterances',range = c(1,10),labels = scales::percent) +
  theme_ridges() + 
  scale_shape_manual(guide = 'none',name = 'Speaker',values = 21:25) + 
  geom_segment(data = toplot %>%
                 select(topic,tmpSpk,int) %>%
                 spread(tmpSpk,int) %>%
                 filter(Yellen > `Male Fed Chairs`),
               aes(x = Yellen,y = topic,xend = `Male Fed Chairs`,yend = topic),
               size = .5,color = 'darkorange',inherit.aes = F) + 
  geom_segment(data = toplot %>%
                 select(topic,tmpSpk,int) %>%
                 spread(tmpSpk,int) %>%
                 filter(Yellen <= `Male Fed Chairs`),
               aes(x = Yellen,y = topic,xend = `Male Fed Chairs`,yend = topic),
               size = .5,color = 'black',inherit.aes = F) + 
  geom_point(alpha = .35) + 
  geom_text(data = toplot %>%
              select(topic,terms,tmpSpk,int) %>%
              spread(tmpSpk,int) %>%
              filter(topic %in% substantive) %>%
              filter(Yellen > `Male Fed Chairs`),
            aes(x = Yellen,y = topic,label = terms),inherit.aes = F,
            size = 2,hjust = -.1) + 
  scale_fill_manual(guide = 'none',name = 'Speaker',values = c('black','darkorange')) + 
  scale_x_continuous(breaks = c(0,.25,.5,.75,1),labels = scales::percent) + 
  theme(legend.position = c(.7,.15),
        axis.title.x = element_text(hjust = .6),
        panel.grid.major.x = element_line(linewidth = .1),
        panel.grid.major.y = element_blank(),
        legend.title = element_text(size = 10),legend.text = element_text(size = 9),
        axis.text.y = element_blank()) + 
  xlab('Proportion Interrupted') + ylab('Topic') + 
  scale_color_manual(guide = 'none',name = 'Speaker',values = c('black','darkorange')) + 
  scale_linetype_discrete(guide = 'none',name = 'Speaker') + 
  geom_text(data = toplot %>% select(tmpSpk,avgInt) %>% distinct(),
            aes(x = avgInt,y = c(100,102),label = tmpSpk,color = tmpSpk),
            inherit.aes = F,hjust = 0,size = 3.5,show.legend = FALSE) +
  coord_cartesian(ylim = c(0,103),xlim = c(0,1.5),clip = 'off')
dev.off()

# EOF